********************************************************************************
* DESCRIPTION: Merges data with connected sets:
*              (1) connected set of employer IDs for AKM estimation.
*              (2) connected set of employer IDs for PageRank estimation;
*              (3) connected set of employer IDs for AKM estimation and, separately, for PageRank estimation;
*              (4) connected set of employer IDs for AKM estimation and, jointly, for PageRank estimation;
*              (5) connected set of unique job IDs for AKM estimation.
*              (6) connected set of unique job IDs for PageRank estimation.
*
* INPUTS:      - Data in memory and one passed argument: switch `1' whether merge should be with connected set for AKM (`1' == 1), with connected set for PageRank (`1' == 2), with both connected sets (`1' == 3), or ... (see below!)
*
* OUTPUTS:     - Data in memory.
*
********************************************************************************
********************************************************************************
* MAIN CODE
********************************************************************************
* switches
local merge_with_connected_akm = inlist(`1', 1, 3) // merge with connected sets of employer IDs for AKM estimation
local merge_with_connected_pagerank = inlist(`1', 2, 3) // merge with connected sets of employer IDs for PageRank estimation
local merge_with_both = inlist(`1', 4) // merge with connected sets of employer IDs for AKM and PageRank estimations in one step, rather than two consecutive steps (faster)
local merge_with_workers_akm = inlist(`1', 5) // merge with connected set of unique job IDs for AKM
local merge_with_workers_pagerank = inlist(`1', 6) // merge with connected set of unique job IDs for PageRank

* merge with connected sets
qui count
local N_before = r(N)
if `merge_with_connected_akm' {
	if $connect_by_categ merge m:1 ${empid_var} ${categ_var} using "${TEMP_DIR}/connected_akm_${categ_var}_${year_min}_${year_max}.dta", keep(match) keepusing(${empid_var} ${categ_var}) generate(merge_akm)
	else merge m:1 ${empid_var} using "${TEMP_DIR}/connected_akm_${year_min}_${year_max}.dta", keep(match) keepusing(${empid_var}) generate(merge_akm)
	qui count if merge_akm == 3
	local N_match_with_akm = r(N)
	local N_dropped_akm = `N_before' - `N_match_with_akm'
	local N_dropped_akm_perc: di %5.2f 100*`N_dropped_akm'/`N_before'
}
if `merge_with_connected_pagerank' {
	if $connect_by_categ merge m:1 ${empid_var} ${categ_var} using "${TEMP_DIR}/connected_pagerank_${categ_var}_${year_min}_${year_max}.dta", keep(match) keepusing(${empid_var} ${categ_var}) generate(merge_pagerank)
	else merge m:1 ${empid_var} using "${TEMP_DIR}/connected_pagerank_${year_min}_${year_max}.dta", keep(match) keepusing(${empid_var}) generate(merge_pagerank)
	qui count if merge_pagerank == 3
	local N_match_with_pagerank = r(N)
	local N_dropped_pagerank = `N_before' - `N_match_with_pagerank'
	local N_dropped_pagerank_perc: di %5.2f 100*`N_dropped_pagerank'/`N_before'
}
if `merge_with_connected_akm' {
	keep if merge_akm == 3
	drop merge_akm
}
if `merge_with_connected_pagerank' {
	keep if merge_pagerank == 3
	drop merge_pagerank
}
if `merge_with_both' {
	if $connect_by_categ merge m:1 ${empid_var} ${categ_var} using "${TEMP_DIR}/connected_both_${categ_var}_${year_min}_${year_max}.dta", keep(match) keepusing(${empid_var} ${categ_var}) nogen
	else merge m:1 ${empid_var} using "${TEMP_DIR}/connected_both_${year_min}_${year_max}.dta", keep(match) keepusing(${empid_var}) nogen
}
if `merge_with_workers_akm' {
	if $connect_by_categ merge m:1 id_unique using "${TEMP_DIR}/connected_akm_workers_${categ_var}_${year_min}_${year_max}.dta", keep(match) keepusing(id_unique) nogen
	else merge m:1 id_unique using "${TEMP_DIR}/connected_akm_workers_${year_min}_${year_max}.dta", keep(match) keepusing(id_unique) nogen
}
if `merge_with_workers_pagerank' {
	if $pagerank_monthly {
		if $connect_by_categ merge m:1 year_month id_unique using "${TEMP_DIR}/connected_pagerank_workers_${categ_var}_${year_min}_${year_max}.dta", keep(match) keepusing(year_month id_unique) nogen
		else merge m:1 year_month id_unique using "${TEMP_DIR}/connected_pagerank_workers_${year_min}_${year_max}.dta", keep(match) keepusing(id_unique) nogen
	}
	else {
		if $connect_by_categ merge m:1 id_unique using "${TEMP_DIR}/connected_pagerank_workers_${categ_var}_${year_min}_${year_max}.dta", keep(match) keepusing(id_unique) nogen
		else merge m:1 id_unique using "${TEMP_DIR}/connected_pagerank_workers_${year_min}_${year_max}.dta", keep(match) keepusing(id_unique) nogen
	}
}
if (`merge_with_connected_akm' & `merge_with_connected_pagerank') | `merge_with_both' | `merge_with_workers_akm' | `merge_with_workers_pagerank' {
	qui count
	local N_after = r(N)
	local N_dropped_total = `N_before' - `N_after'
	local N_dropped_total_perc: di %5.2f 100*`N_dropped_total'/`N_before'
}
if `merge_with_connected_akm' disp "--> Restriction to only connected set of employer IDs for AKM dropped `N_dropped_akm' (`N_dropped_akm_perc'%) out of `N_before' observations."
if `merge_with_connected_pagerank' disp "--> Restriction to only connected set of employer IDs for PageRank dropped `N_dropped_pagerank' (`N_dropped_pagerank_perc'%) out of `N_before' observations."
if (`merge_with_connected_akm' & `merge_with_connected_pagerank') | `merge_with_both' disp "--> Restriction to both connected sets of employer IDs dropped `N_dropped_total' (`N_dropped_total_perc'%) out of `N_before' observations."
if `merge_with_workers_akm' | `merge_with_workers_pagerank' disp "--> Restriction to connected set of unique job IDs dropped `N_dropped_total' (`N_dropped_total_perc'%) out of `N_before' observations."




********************************************************************************
* RETURNING TO MAIN CODE
********************************************************************************
